load("~/Documents/MiGASti/Databases/gene_matrix.RData")
metadata <- read.table("~/Documents/MiGASti/Databases/metadata.txt")
#set rownames to Sample
row.names(metadata) <- metadata$Sample 
setwd("~/Documents/MiGASti/Databases")
#exclude samples that did not pass QC filtering
exclude <- read.table("samples2remove.txt")
exclude <- exclude$x
genes_counts_filt = genes_counts[, !colnames(genes_counts) %in% exclude] 
#Excludes the samples from filters. 
#dim(genes_counts_filt)
metadata_filt = metadata[ !(rownames(metadata) %in% exclude), ]
length(metadata_filt)
gencode_30 = read.table("~/Documents/MiGASti/Databases/ens.geneid.gencode.v30")
colnames(gencode_30) = c("ensembl","symbol")
#order metadata and genes counts
genes_counts_ordered <- genes_counts_filt[,rownames(metadata_filt)]
#head(genes_counts_ordered)
all(rownames(metadata_filt) == colnames (genes_counts_ordered)) #TRUE
# Counts, TPM, voom and metadata filtered 
# 496 samples!  
# Filter for genes not expressed: 50 % of the samples! 
# dim(genes_counts_ordered) #18997   496
# dim(metadata_filt) #496  38

Preparing the samples for DEG

#remove uncultured samples
metadata_cultured <- metadata_filt[metadata_filt$Stimulation != "ununstim",]
#check numbers
#dim(metadata_cultured)
#check numbers per stimulation
#table(metadata_filt$Stimulation)
#select only SVZ samples in metadata
metadata_SVZ = metadata_cultured[metadata_cultured$Region=="SVZ",]
#select only SVZ samples in genes counts
genes_counts_SVZ <- genes_counts_ordered[,metadata_SVZ$Sample]
#order metadata and genes_counts
genes_counts_SVZ_ordered <- genes_counts_SVZ[,rownames(metadata_SVZ)]
#check ordering
all(rownames(metadata_SVZ) == colnames (genes_counts_SVZ_ordered)) #TRUE

[1] TRUE

#round counts; deseq2 can only handle integers
genes_counts_SVZ_ordered <- round(genes_counts_SVZ_ordered, digits=0)

#make sure covariate variables are the right format 
#cannot create dds object with numeric values
metadata_SVZ$Donor_id <- as.factor(metadata_SVZ$Donor_id)
metadata_SVZ$age <- as.integer(metadata_SVZ$age)
metadata_SVZ$sex <- as.factor(metadata_SVZ$sex)
metadata_SVZ$Stimulation <- as.factor(metadata_SVZ$Stimulation)
metadata_SVZ$picard_pct_ribosomal_bases = scale(metadata_SVZ$picard_pct_ribosomal_bases)
metadata_SVZ$picard_pct_mrna_bases = scale(metadata_SVZ$picard_pct_mrna_bases)
metadata_SVZ$picard_pct_pf_reads_aligned = scale(metadata_SVZ$picard_pct_pf_reads_aligned)
metadata_SVZ$picard_pct_percent_duplication = scale(metadata_SVZ$picard_pct_percent_duplication)

#adjust for: ~ age + (1|donor_id) + picard_pct_ribosomal_bases + picard_pct_mrna_bases +   picard_pct_percent_duplication + picard_pct_pf_reads_aligned 
table(metadata_SVZ$Stimulation)

ATP IFNy LPS R848 TNFa unstim 1 24 29 24 24 29

DESeq2 of SVZ samples

#createDeSEQ2 object for LPS
dds <- DESeqDataSetFromMatrix(countData = genes_counts_SVZ_ordered,
                              colData = metadata_SVZ,
                              design = ~ age + sex + picard_pct_ribosomal_bases + picard_pct_mrna_bases + picard_pct_percent_duplication + picard_pct_pf_reads_aligned + Stimulation) 
#variable of interest at end of the formula

#Make sure that control group is set as the reference group
dds$Stimulation <- relevel(dds$Stimulation, ref="unstim")
#head(dds)

#filter: CPM > 1 in 50% of the samples 
keep.exp = rowSums(cpm(genes_counts_SVZ_ordered) > 1) >= 0.5*ncol(genes_counts_SVZ_ordered)
dds = dds[keep.exp,]

#Run differential expression 
dds <- DESeq(dds, betaPrior = FALSE)
resultsNames(dds)

[1] “Intercept” “age”
[3] “sex_m_vs_f” “picard_pct_ribosomal_bases”
[5] “picard_pct_mrna_bases” “picard_pct_percent_duplication” [7] “picard_pct_pf_reads_aligned” “Stimulation_ATP_vs_unstim”
[9] “Stimulation_IFNy_vs_unstim” “Stimulation_LPS_vs_unstim”
[11] “Stimulation_R848_vs_unstim” “Stimulation_TNFa_vs_unstim”

DESeq2: LPS vs unstim

Number of differentially expressed genes

# generate results table for LPS vs unstim
res_LPS <- results(dds, name="Stimulation_LPS_vs_unstim")
sum(res_LPS$padj < 0.05, na.rm=TRUE)

[1] 1880

resOrdered_LPS <- res_LPS[order(res_LPS$pvalue),] 
resOrdered_LPS <- as.data.frame(resOrdered_LPS)

Volcano plot LPS vs unstim

head(res_LPS)

log2 fold change (MLE): Stimulation LPS vs unstim Wald test p-value: Stimulation LPS vs unstim DataFrame with 6 rows and 6 columns baseMean log2FoldChange lfcSE stat pvalue ENSG00000000419.12 554.9419 -0.000461996 0.170162 -0.00271504 0.99783371 ENSG00000000457.14 135.5606 0.062906411 0.132462 0.47490159 0.63485712 ENSG00000000460.17 49.4678 -0.410183614 0.150383 -2.72758594 0.00637996 ENSG00000000938.13 832.3143 0.003510167 0.188946 0.01857759 0.98517808 ENSG00000000971.15 24.9222 0.214396564 0.251639 0.85200184 0.39421307 ENSG00000001036.13 897.1695 -0.378546859 0.150965 -2.50752215 0.01215810 padj ENSG00000000419.12 0.9988869 ENSG00000000457.14 0.8056380 ENSG00000000460.17 0.0533008 ENSG00000000938.13 0.9929994 ENSG00000000971.15 0.6297570 ENSG00000001036.13 0.0812815

with(res_LPS, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-2.5,2)))
with(subset(res_LPS, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))

MA plot LPS vs unstim

#The function plotMA shows the log2 fold changes attributable to a given variable over the mean of normalized counts for all the samples in the DESeqDataSet. Points will be colored if the adjusted p value is less than 0.1. Points which fall out of the window are plotted as open triangles pointing either up or down.

plotMA(res_LPS, ylim=c(-2,2))

### TOP differentially expressed genes LPS vs unstim

setDT(resOrdered_LPS, keep.rownames = "ensembl")
resOrdered_LPS <- left_join(resOrdered_LPS, gencode_30, by = "ensembl")
resOrdered_LPS_top = resOrdered_LPS[order(resOrdered_LPS$padj) ,]
setDT(resOrdered_LPS_top, keep.rownames = "ensembl")
resOrdered_LPS_top = resOrdered_LPS_top[, c("ensembl", "symbol", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj")]
createDT(resOrdered_LPS_top)
write.table(resOrdered_LPS_top, "DEG_LPS_SVZ.txt")

DESeq2: IFNy vs unstim

Number of differentially expressed genes

# generate results table for IFNy vs unstim
res_IFNy <- results(dds, name="Stimulation_IFNy_vs_unstim")
sum(res_IFNy$padj < 0.05, na.rm=TRUE)

[1] 287

resOrdered_IFNy <- res_IFNy[order(res_IFNy$pvalue),] 
resOrdered_IFNy <- as.data.frame(resOrdered_IFNy)

Volcano plot IFNy vs unstim

head(res_IFNy)

log2 fold change (MLE): Stimulation IFNy vs unstim Wald test p-value: Stimulation IFNy vs unstim DataFrame with 6 rows and 6 columns baseMean log2FoldChange lfcSE stat pvalue ENSG00000000419.12 554.9419 0.0920600 0.180667 0.509555 6.10363e-01 ENSG00000000457.14 135.5606 0.6307679 0.140063 4.503467 6.68536e-06 ENSG00000000460.17 49.4678 -0.0925454 0.159215 -0.581259 5.61066e-01 ENSG00000000938.13 832.3143 0.0260403 0.200130 0.130117 8.96474e-01 ENSG00000000971.15 24.9222 0.4923268 0.265965 1.851100 6.41552e-02 ENSG00000001036.13 897.1695 -0.1705399 0.160010 -1.065805 2.86512e-01 padj ENSG00000000419.12 0.936785285 ENSG00000000457.14 0.000661318 ENSG00000000460.17 0.917786781 ENSG00000000938.13 0.983293632 ENSG00000000971.15 0.627312581 ENSG00000001036.13 0.818242000

with(res_IFNy, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-10,10)))
with(subset(res_IFNy, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))

MA plot IFNy vs unstim

plotMA(res_IFNy, ylim=c(-2,2))

TOP differentially expressed genes IFNy vs unstim

setDT(resOrdered_IFNy, keep.rownames = "ensembl")
resOrdered_IFNy <- merge(resOrdered_IFNy, gencode_30, by = "ensembl")
resOrdered_IFNy_top = resOrdered_IFNy[order(resOrdered_IFNy$padj) ,]
setDT(resOrdered_IFNy_top, keep.rownames = "ensembl")
resOrdered_IFNy_top = resOrdered_IFNy_top[, c("ensembl", "symbol", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj")]
createDT(resOrdered_IFNy_top)
write.table(resOrdered_IFNy_top, "DEG_IFNy_SVZ.txt")

DESeq2: TNFa vs unstim

# generate results table for TNFa vs unstim
res_TNFa <- results(dds, name="Stimulation_TNFa_vs_unstim")
sum(res_TNFa$padj < 0.05, na.rm=TRUE)

[1] 5

resOrdered_TNFa <- res_TNFa[order(res_TNFa$pvalue),] 
resOrdered_TNFa <- as.data.frame(resOrdered_TNFa)

Volcano plot TNFa vs unstim

head(res_TNFa)

log2 fold change (MLE): Stimulation TNFa vs unstim Wald test p-value: Stimulation TNFa vs unstim DataFrame with 6 rows and 6 columns baseMean log2FoldChange lfcSE stat pvalue ENSG00000000419.12 554.9419 0.3180282 0.180096 1.765879 0.0774162 ENSG00000000457.14 135.5606 0.1275026 0.140542 0.907219 0.3642911 ENSG00000000460.17 49.4678 -0.0591154 0.158231 -0.373602 0.7087007 ENSG00000000938.13 832.3143 0.0500461 0.200286 0.249873 0.8026852 ENSG00000000971.15 24.9222 0.5453570 0.265907 2.050931 0.0402737 ENSG00000001036.13 897.1695 -0.1342882 0.159930 -0.839669 0.4010942 padj ENSG00000000419.12 0.485438 ENSG00000000457.14 0.741552 ENSG00000000460.17 0.904549 ENSG00000000938.13 0.939976 ENSG00000000971.15 0.417635 ENSG00000001036.13 0.761969

with(res_TNFa, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-10,10)))
with(subset(res_TNFa, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))

MA plot TNFa vs unstim

plotMA(res_TNFa, ylim=c(-2,2))

### Top differentially expressed genes TNFa vs unstim

setDT(resOrdered_TNFa, keep.rownames = "ensembl")
resOrdered_TNFa <- merge(resOrdered_TNFa, gencode_30, by = "ensembl")
resOrdered_TNFa_top = resOrdered_TNFa[order(resOrdered_TNFa$padj) ,]
setDT(resOrdered_TNFa_top, keep.rownames = "ensembl")
resOrdered_TNFa_top = resOrdered_TNFa_top[, c("ensembl", "symbol", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj")]
createDT(resOrdered_TNFa_top)
write.table(resOrdered_TNFa_top, "DEG_TNFa_SVZ.txt")

DESeq2: R848 vs unstim

Number of differentially expressed genes

# generate results table for R848 vs unstim
res_R848 <- results(dds, name="Stimulation_R848_vs_unstim")
sum(res_R848$padj < 0.05, na.rm=TRUE)

[1] 696

resOrdered_R848 <- res_R848[order(res_R848$pvalue),] 
resOrdered_R848 <- as.data.frame(resOrdered_R848)

Volcano plot R848 vs unstim

head(res_R848)

log2 fold change (MLE): Stimulation R848 vs unstim Wald test p-value: Stimulation R848 vs unstim DataFrame with 6 rows and 6 columns baseMean log2FoldChange lfcSE stat pvalue ENSG00000000419.12 554.9419 0.271483 0.177854 1.526432 0.1269022 ENSG00000000457.14 135.5606 0.110164 0.138663 0.794469 0.4269224 ENSG00000000460.17 49.4678 -0.232324 0.156956 -1.480188 0.1388230 ENSG00000000938.13 832.3143 0.193817 0.197679 0.980467 0.3268556 ENSG00000000971.15 24.9222 0.234101 0.261671 0.894638 0.3709807 ENSG00000001036.13 897.1695 -0.337953 0.157927 -2.139932 0.0323603 padj ENSG00000000419.12 0.435170 ENSG00000000457.14 0.729514 ENSG00000000460.17 0.451513 ENSG00000000938.13 0.652497 ENSG00000000971.15 0.686429 ENSG00000001036.13 0.221407

with(res_R848, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-10,10)))
with(subset(res_R848, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))

MA plot R848 vs unstim

plotMA(res_R848, ylim=c(-2,2))

TOP differentially expressed genes R848 vs unstim

setDT(resOrdered_R848, keep.rownames = "ensembl")
resOrdered_R848 <- left_join(resOrdered_R848, gencode_30, by = "ensembl")
resOrdered_R848_top = resOrdered_R848[order(resOrdered_R848$padj) ,]
setDT(resOrdered_R848_top, keep.rownames = "ensembl")
resOrdered_R848_top = resOrdered_R848_top[, c("ensembl", "symbol", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj")]
createDT(resOrdered_R848_top)
write.table(resOrdered_R848_top, "DEG_R848_SVZ.txt")

DESeq2: ATP vs unstim

#only one sample present, so removed this stimulation for further analysis. ### Number of differentially expressed genes

# generate results table for ATP vs unstim
res_ATP <- results(dds, name="Stimulation_ATP_vs_unstim")
sum(res_ATP$padj < 0.05, na.rm=TRUE)

[1] 0

resOrdered_ATP <- res_ATP[order(res_ATP$pvalue),] 
resOrdered_ATP <- as.data.frame(resOrdered_ATP)

Volcano plot ATP vs unstim

head(res_ATP)

log2 fold change (MLE): Stimulation ATP vs unstim Wald test p-value: Stimulation ATP vs unstim DataFrame with 6 rows and 6 columns baseMean log2FoldChange lfcSE stat pvalue ENSG00000000419.12 554.9419 0.8602350 0.671942 1.280223 0.200467 ENSG00000000457.14 135.5606 -0.0908964 0.540682 -0.168114 0.866493 ENSG00000000460.17 49.4678 -0.6129804 0.671191 -0.913272 0.361099 ENSG00000000938.13 832.3143 0.1333548 0.745028 0.178993 0.857943 ENSG00000000971.15 24.9222 0.6755773 0.913079 0.739889 0.459367 ENSG00000001036.13 897.1695 -0.0660882 0.599534 -0.110233 0.912225 padj ENSG00000000419.12 0.999956 ENSG00000000457.14 0.999956 ENSG00000000460.17 0.999956 ENSG00000000938.13 0.999956 ENSG00000000971.15 0.999956 ENSG00000001036.13 0.999956

with(res_ATP, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-10,10)))
with(subset(res_ATP, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))

MA plot ATP vs unstim

plotMA(res_ATP, ylim=c(-2,2))

TOP differentially expressed genes ATP vs unstim

setDT(resOrdered_ATP, keep.rownames = "ensembl")
resOrdered_ATP <- left_join(resOrdered_ATP, gencode_30, by = "ensembl")
resOrdered_ATP_top = resOrdered_ATP[order(resOrdered_ATP$padj) ,]
setDT(resOrdered_ATP_top, keep.rownames = "ensembl")
resOrdered_ATP_top = resOrdered_ATP_top[, c("ensembl", "symbol", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj")]
createDT(resOrdered_ATP_top)
write.table(resOrdered_ATP_top, "DEG_ATP_SVZ.txt")

#Create genelists with Log2FC < 1 and < -1 for different stimuli

resOrdered_LPS_p <- subset(resOrdered_LPS_top, padj < 0.05)
resOrdered_LPS_LFC <- subset(resOrdered_LPS_p, log2FoldChange > 1 | log2FoldChange < -1)
write.table(resOrdered_LPS_LFC, "LPS_SVZ_FC1.txt")

resOrdered_IFNy_p <- subset(resOrdered_IFNy_top, padj < 0.05)
resOrdered_IFNy_LFC <- subset(resOrdered_IFNy_p, log2FoldChange > 1 | log2FoldChange < -1)
write.table(resOrdered_IFNy_LFC, "IFNy_SVZ_FC1.txt")

resOrdered_TNFa_p <- subset(resOrdered_TNFa, padj < 0.05)
resOrdered_TNFa_LFC <- subset(resOrdered_IFNy_p, log2FoldChange > 1 | log2FoldChange < -1)
write.table(resOrdered_TNFa_LFC, "TNFa_SVZ_FC1.txt")

resOrdered_R848_p <- subset(resOrdered_R848_top, padj < 0.05)
resOrdered_R848_LFC <- subset(resOrdered_R848_p, log2FoldChange > 1 | log2FoldChange < -1)
write.table(resOrdered_R848_LFC, "R848_SVZ_FC1.txt")